## ---- Setup ----
## lmer estimation algorithm control
strict_tol <- lmerControl(optCtrl = list(xtol_abs = 1e-8,
                                         ftol_abs = 1e-8))

## ---- Data ----
## SOEP data
load("dat/proc-data/soep_imp.RData")

## ---- Shortened TSCS: AfD analyses, 2014-2018 (demeaning) ----
## Variable selection for demeaning
current_dm_vars <- soep_imp$imputations$imp1 %>%
  dplyr::select(dplyr::ends_with("_cwu")) %>%
  names() %>%
  substr(., 1, nchar(.) - 4)
additional_dm_vars <- c(
  "nbh_met_p_deutschl",
  "nbh_alq_p_quote",
  "kr_uemprate",
  "kr_foreigner"
)
dm_vars <- c(current_dm_vars, additional_dm_vars)

## Updated regional data
regional_data <- rio::import(
  "../../soep-data/soep.v38/stata/regionl.dta"
) %>%
  dplyr::filter(kkz > 0) %>%
  dplyr::select(kkz,
                syear,
                kr_uemprate,
                kr_foreigner) %>%
  dplyr::rename(year = syear) %>%
  dplyr::mutate_at(
    .vars = vars(kr_uemprate,
                 kr_foreigner),
    .funs = ~ifelse(. < 0, NA_real_, .)
  ) %>%
  dplyr::distinct() %>%
  dplyr::group_by(kkz, year) %>% 
  dplyr::summarize_all(mean) %>%
  dplyr::ungroup()

## Data management
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    dplyr::mutate(yrs_at_current_address = year - hh_at_current_address,
                  age_at_current_address = age - yrs_at_current_address) %>%
    dplyr::mutate(ltr_since_3 = as.numeric(yrs_at_current_address >= 3),
                  ltr_since_5 = as.numeric(yrs_at_current_address >= 5),
                  ltr_since_7 = as.numeric(yrs_at_current_address >= 7),
                  ltr_age_30 = as.numeric(age_at_current_address <= 30)) %>%
    dplyr::mutate(lw = as.numeric(lef == 1 | spd == 1 | gre == 1),
                  rw = as.numeric(cdu == 1 | fdp == 1 | afd == 1)) %>%
    filter(renter_no_rent == 0) %>%
    dplyr::select(
      id,
      hh_id,
      plz,
      kkz,
      year,
      year_fac,
      weight,
      starts_with("cold_rent_load"),
      starts_with("cold_rent_sqm"),
      wr_ecego,
      wr_immig,
      afd,
      vote_afd,
      gre,
      lef,
      fdp,
      cdu,
      spd,
      non,
      lw,
      rw,
      owner,
      all_of(dm_vars),
      starts_with("rent"),
      starts_with("cmr_arm"),
      starts_with("log_hinc_eq"),
      starts_with("gtyp"),
      starts_with("prop_personal_hinc"),
      starts_with("hh_prop_ecact"),
      starts_with("hh_mmb"),
      starts_with("mover"),
      starts_with("lm_part"),
      starts_with("edu"),
      starts_with("age"),
      starts_with("east"),
      starts_with("fem"),
      starts_with("east"),
      starts_with("ltr_"),
      social_housing
    ) %>%
    dplyr::filter(not(is.na(rent_umn))) %>%
    dplyr::filter(not(is.na(rent_cwu))) %>%
    dplyr::filter(not(is.na(cmr_arm_umn))) %>%
    dplyr::filter(not(is.na(cmr_arm_cwu)))
}

## Local market data
load("dat/proc-data/plz5_f_und_b.RData")
plz5_f_und_b <- plz5_f_und_b %>%
  st_set_geometry(NULL) %>%
  filter(year %in% 2005:2018) %>%
  dplyr::select(plz, year, cmr_arm, rent) %>%
  distinct() %>%
  group_by(plz) %>%
  mutate(
    cmr_arm_2005 = mean(cmr_arm[year %in% 2005:2007], na.rm = TRUE),
    cmr_arm_2018 = mean(cmr_arm[year %in% 2016:2018], na.rm = TRUE),
    cmr_arm_chg = cmr_arm_2018 - cmr_arm_2005
  ) %>%
  mutate(
    rent_2005 = mean(rent[year %in% 2005:2007], na.rm = TRUE),
    rent_2018 = mean(rent[year %in% 2016:2018], na.rm = TRUE),
    rent_chg = rent_2018 - rent_2005,
    cmr_arm_chg = ifelse(is.na(cmr_arm_chg), rent_chg, cmr_arm_chg),
    cmr_arm = ifelse(is.na(cmr_arm), rent, cmr_arm)
  ) %>%
  ungroup() %>%
  group_by(year) %>%
  mutate(
    rent_cat =
      ifelse(
        year == 2004,
        NA_integer_,
        cut(
          rent,
          breaks = quantile(rent, c(0, 1 / 3, 2 / 3, 1), na.rm = TRUE),
          labels = paste0("T", 1:3)
        )
      ),
    cmr_arm_cat =
      cut(
        cmr_arm,
        breaks = quantile(cmr_arm, c(0, 1 / 3, 2 / 3, 1), na.rm = TRUE),
        labels = paste0("T", 1:3)
      )
  ) %>% 
  ungroup() %>%
  dplyr::select(plz, cmr_arm_chg, rent_chg, rent_cat, cmr_arm_cat, year) %>%
  distinct() %>%
  na.omit() %>%
  mutate(
    cmr_arm_chg_cat =
      cut(
        cmr_arm_chg,
        breaks = quantile(cmr_arm_chg, c(0, 1 / 3, 2 / 3, 1)),
        labels = paste0("T", 1:3)
      ),
    rent_chg_cat =
      cut(
        rent_chg,
        breaks = quantile(rent_chg, c(0, 1 / 3, 2 / 3, 1)),
        labels = paste0("T", 1:3)
      )
  )

## Additional data management
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    # top code household rents per sqm
    mutate(
      cold_rent_sqm = case_when(
        cold_rent_sqm > 40 ~ 40,
        TRUE ~ cold_rent_sqm
      )
    ) %>% 
    group_by(year) %>%
    mutate(
      log_hinc_eq_cat =
        cut(
          log_hinc_eq,
          breaks = quantile(log_hinc_eq, c(0, 1 / 3, 2 / 3, 1)),
          labels = paste0("T", 1:3)
        )
    ) %>%
    ungroup() %>%
    mutate(
      gtyp4 = case_when(
        gtyp %in% c(
          "[1] Kernstaedte>==gr.Verdraum",
          "[2] Kernstaedte<==gr.Verdraum",
          "[9] Kernstaedte<==Verdansatz"
        ) ~ "urban",
        gtyp %in% c(
          "[7] Mi-zent.=laendl=gr.Verdraum",
          "[10] Mi-zent.=verd.=Verdansatz",
          "[12] Mi-zent.=laendl=Verdansatz",
          "[14] Mi-zent.=verd.=laendl.",
          "[16] Mi-zent.=laendl=laendl."
        ) ~ "towns",
        gtyp %in% c(
          "[3] Mi-zent.=hochverd.=gr.Verdraum",
          "[5] Mi-zent.=verd.=gr.Verdraum",
          "[4] so.Gem.=hochverd.=gr.Verdraum",
          "[6] so.Gem.=verd.=gr.Verdraum"
        ) ~ "suburban",
        gtyp %in% c(
          "[8] so.Gem.=laendl=gr.Verdraum",
          "[11] so.Gem.=verd.=Verdansatz",
          "[13] so.Gem.=laendl=Verdansatz",
          "[15] so.Gem.=verd.=laendl.",
          "[17] so.Gem.=laendl=laendl."
        ) ~ "rural",
        TRUE ~ NA_character_
      ),
      gtyp3 = ifelse(gtyp4 == "towns", "rural", gtyp4)
    ) %>%
    left_join(plz5_f_und_b,
              by = c("plz", "year")) %>%
    dplyr::filter(weight > 0)
  
  ## Repair names
  names(soep_imp$imputations[[m]]) <- 
    gsub(" ", "_", names(soep_imp$imputations[[m]]))
}

## Join county-level data, within-demeaning
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    dplyr::select(-dplyr::starts_with("kr_")) %>%
    dplyr::left_join(regional_data,
                     by = c("kkz", "year")) %>%
    dplyr::select(-ends_with("_cwu"), -ends_with("_cwu")) 
}


## 2014:2018 Data for AfD support analyses
dm_vars <- gsub(" ", "_", dm_vars)
soep_afd <- soep_imp
soep_afd$imputations <-
  lapply(soep_afd$imputations, function (imp) {
    imp %>%
      dplyr::filter(year >= 2014) %>%
      dplyr::select(-dplyr::ends_with("_cwu")) %>%
      dplyr::select(-dplyr::ends_with("_umn")) %>%
      group_by(id) %>%
      mutate_at(.vars = vars(all_of(dm_vars)),
                .funs = list(
                  umn = ~ mean(., na.rm = T),
                  cwu = ~ . - mean(., na.rm = T)
                )) %>%
      ungroup()
  })

## [2014, 2018] Data for vote choice analyses
soep_vot <- soep_imp
soep_vot$imputations <-
  lapply(soep_afd$imputations, function (imp) {
    imp %>%
      dplyr::filter(year %in% c(2014, 2018)) %>%
      dplyr::select(-dplyr::ends_with("_cwu")) %>%
      dplyr::select(-dplyr::ends_with("_umn")) %>%
      group_by(id) %>%
      mutate_at(.vars = vars(all_of(dm_vars)),
                .funs = list(
                  umn = ~ mean(., na.rm = T),
                  cwu = ~ . - mean(., na.rm = T)
                )) %>%
      ungroup()
  })

## ---- Models ----
## Outcomes
outcomes <- c(
  "afd",
  "vote_afd"
)
outcomes_aux <- c(
  "cold_rent_sqm",
  "wr_ecego"
)

## Main predictors
predictors <- predictors_aux <- c("cmr_arm")

## Renter controls
rent_vars <- rent_vars_aux <- c(NA_character_,
                                "cold_rent_sqm")

## Moderators
individual_moderators <-
  individual_moderators_aux <- c(NA_character_,
                                 "log_hinc_eq",
                                 "log_hinc_eq_cat")
regional_moderators <- c(
  NA_character_,
  "gtyp3",
  "[_pred_]_cat",
  "[_pred_]_chg_cat"
)
regional_moderators_aux <- c(
  NA_character_,
  "gtyp3"
)

## Subsets
subset_owner <- subset_owner_aux <- c("owner == 0",
                                      "owner == 1")
subset_ltr <- c(NA_character_,
                "ltr_since_3 == 0",
                "ltr_since_5 == 0",
                "ltr_since_3 == 1",
                "ltr_since_5 == 1")
subset_ltr_aux <- "ltr_since_5 == 1"

## Model formula components
predictor_terms <- predictor_terms_aux <- 
  c("[_pred_]_cwu", "[_pred_]_umn")

renter_control_terms <- renter_control_terms_aux <- 
  c("[_rent_var_]_cwu", "[_rent_var_]_umn")

common_control_terms <-
  list(
    "none_year_fe" = "year_fac",
    "full" = c(
      "log_hinc_eq_cwu",
      "log_hinc_eq_umn",
      "prop_personal_hinc_cwu",
      "prop_personal_hinc_umn",
      "hh_prop_ecact_cwu",
      "hh_prop_ecact_umn",
      "hh_mmb_cwu",
      "hh_mmb_umn",
      "lm_part_Atypical_umn",
      "lm_part_Inactive_umn",
      "lm_part_Unemployed_umn",
      "lm_part_In_Education_umn",
      "lm_part_Pensioners_umn",
      "lm_part_Atypical_cwu",
      "lm_part_Inactive_cwu",
      "lm_part_Unemployed_cwu",
      "lm_part_In_Education_cwu",
      "lm_part_Pensioners_cwu",
      "age",
      "east",
      "fem",
      "edu5",
      "gtyp3",
      "year_fac"
    )
    ,
    "full_plus1" = c(
      "log_hinc_eq_cwu",
      "log_hinc_eq_umn",
      "prop_personal_hinc_cwu",
      "prop_personal_hinc_umn",
      "hh_prop_ecact_cwu",
      "hh_prop_ecact_umn",
      "hh_mmb_cwu",
      "hh_mmb_umn",
      "lm_part_Atypical_umn",
      "lm_part_Inactive_umn",
      "lm_part_Unemployed_umn",
      "lm_part_In_Education_umn",
      "lm_part_Pensioners_umn",
      "lm_part_Atypical_cwu",
      "lm_part_Inactive_cwu",
      "lm_part_Unemployed_cwu",
      "lm_part_In_Education_cwu",
      "lm_part_Pensioners_cwu",
      "age",
      "east",
      "fem",
      "edu5",
      "gtyp3",
      "year_fac",
      "kr_foreigner_cwu",
      "kr_uemprate_cwu",
      "kr_foreigner_umn",
      "kr_uemprate_umn"
    )
  )

common_control_terms_aux <- common_control_terms["full"]

## ---- Compile model code ----
model_formula <-
  'mod <- lmer(
  [_yvar_] ~
  [_xvars_](1 | id) + (1 | plz) + (1 | kkz),
  data = [_data_],
  subset = [_subset_],
  control = strict_tol,
  weights = weight
)'

model_formula_list_main <- list()
counter <- 0L
for (outcome in outcomes) {
  if (outcome %in% c("cold_rent_load",
                     "cold_rent_sqm")) {
    subset_owner_local <- subset_owner[1]
  } else {
    subset_owner_local <- subset_owner
  }
  for (predictor in predictors) {
    for (moderator in individual_moderators) {
      for (regional in regional_moderators) {
        for (owner in subset_owner_local) {
          if (owner == "owner == 1" |
              outcome %in% c("cold_rent_load",
                             "cold_rent_sqm")) {
            rent_vars_local <- NA_character_
          } else {
            rent_vars_local <- rent_vars
          }
          
          for (ltr in subset_ltr) {
            for (rent_var in rent_vars_local) {
              for (controls in names(common_control_terms)) {
                ## Counter
                counter <- counter + 1L
                
                ## Covariates
                if (is.na(moderator) & is.na(regional)) {
                  xvars <-
                    collapse_plus(predictor_terms) %>%
                    gsub("\\[\\_pred\\_\\]",
                         predictor,
                         .)
                  
                  if (not(controls == "none")) {
                    xvars <- paste0(xvars,
                                    collapse_plus(common_control_terms[[controls]]))
                  }
                  
                  if (not(is.na(rent_var))) {
                    renter_controls_local <-
                      gsub(
                        "\\[\\_rent\\_var\\_\\]",
                        rent_var,
                        collapse_plus(renter_control_terms)
                      )
                    xvars <- paste0(xvars, renter_controls_local)
                  }
                } else if (not(is.na(moderator)) & is.na(regional)) {
                  xvars <-
                    sapply(predictor_terms, collapse_by, moderator) %>%
                    collapse_plus() %>%
                    gsub("\\[\\_pred\\_\\]",
                         predictor,
                         .)
                  
                  if (not(controls == "none")) {
                    xvars <- paste0(xvars,
                                    collapse_plus(common_control_terms[[controls]]))
                  }
                  
                  if (not(is.na(rent_var))) {
                    renter_controls_local <-
                      sapply(renter_control_terms,
                             collapse_by,
                             moderator) %>%
                      collapse_plus() %>%
                      gsub("\\[\\_rent\\_var\\_\\]",
                           rent_var,
                           .)
                    xvars <- paste0(xvars, renter_controls_local)
                  }
                } else if (is.na(moderator) & not(is.na(regional))) {
                  xvars <-
                    sapply(predictor_terms, collapse_by, regional) %>%
                    collapse_plus() %>%
                    gsub("\\[\\_pred\\_\\]",
                         predictor,
                         .)
                  
                  if (not(controls == "none")) {
                    xvars <- paste0(xvars,
                                    collapse_plus(common_control_terms[[controls]]))
                  }
                  
                  if (not(is.na(rent_var))) {
                    renter_controls_local <-
                      sapply(renter_control_terms,
                             collapse_by,
                             regional) %>%
                      collapse_plus() %>%
                      gsub("\\[\\_rent\\_var\\_\\]",
                           rent_var,
                           .) %>%
                      gsub("\\[\\_pred\\_\\]",
                           predictor,
                           .)
                    xvars <- paste0(xvars, renter_controls_local)
                  }
                } else if (not(is.na(moderator)) &
                           not(is.na(regional))) {
                  xvars <-
                    sapply(predictor_terms,
                           collapse_by,
                           collapse_by(moderator, regional)) %>%
                    collapse_plus() %>%
                    gsub("\\[\\_pred\\_\\]",
                         predictor,
                         .)
                  
                  if (not(controls == "none")) {
                    xvars <- paste0(xvars,
                                    collapse_plus(common_control_terms[[controls]]))
                  }
                  
                  if (not(is.na(rent_var))) {
                    renter_controls_local <-
                      sapply(
                        renter_control_terms,
                        collapse_by,
                        collapse_by(moderator, regional)
                      ) %>%
                      collapse_plus() %>%
                      gsub("\\[\\_rent\\_var\\_\\]",
                           rent_var,
                           .) %>%
                      gsub("\\[\\_pred\\_\\]",
                           predictor,
                           .)
                    
                    xvars <- paste0(xvars, renter_controls_local)
                  }
                }
                
                ## Data
                if (outcome == "afd") {
                  data <- "soep_afd$imputations[[m]]"
                  
                } else if (outcome == "vote_afd") {
                  data <- "soep_vot$imputations[[m]]"
                } else {
                  data <- "soep_imp$imputations[[m]]"
                }

                if (not(is.na(owner))) {
                  data <- paste(data,
                                paste0("filter(", owner, ")"),
                                sep = " %>% \n    ")
                }

                ## Combine
                model_formula_local <- model_formula %>%
                  gsub("\\[\\_yvar\\_\\]", outcome, .) %>%
                  gsub("\\[\\_xvars\\_\\]",
                       xvars,
                       .) %>%
                  gsub("\\[\\_data\\_\\]",
                       data,
                       .) %>%
                  gsub("\\[\\_subset\\_\\]",
                       ifelse(is.na(ltr), "NULL", ltr),
                       .)
                
                ## Return
                model_formula_list_main[[counter]] <-
                  list(
                    outcome = outcome,
                    predictor = predictor,
                    moderator = moderator,
                    regional = gsub("\\[\\_pred\\_\\]",
                                    predictor,
                                    regional),
                    owner = owner,
                    subset = ltr,
                    rent_var = rent_var,
                    controls = controls,
                    formula = model_formula_local
                  )
              }
            }
          }
        }
      }
    }
  }
}

## Auxiliary analyses
model_formula_list_aux <- list()
counter <- 0L
for (outcome in outcomes_aux) {
  if (outcome %in% c("cold_rent_load",
                     "cold_rent_sqm")) {
    subset_owner_local <- subset_owner[1]
  } else {
    subset_owner_local <- subset_owner
  }
  for (predictor in predictors_aux) {
    for (moderator in individual_moderators_aux) {
      for (regional in regional_moderators_aux) {
        for (owner in subset_owner_local) {
          if (owner == "owner == 1" |
              outcome %in% c("cold_rent_load",
                             "cold_rent_sqm")) {
            rent_vars_local <- NA_character_
          } else {
            rent_vars_local <- rent_vars_aux
          }
          
          for (ltr in subset_ltr_aux) {
            for (rent_var in rent_vars_local) {
              for (controls in names(common_control_terms_aux)) {
                ## Counter
                counter <- counter + 1L
                
                ## Covariates
                if (is.na(moderator) & is.na(regional)) {
                  xvars <-
                    collapse_plus(predictor_terms_aux) %>%
                    gsub("\\[\\_pred\\_\\]",
                         predictor,
                         .)
                  
                  if (not(controls == "none")) {
                    xvars <- paste0(xvars,
                                    collapse_plus(common_control_terms_aux[[controls]]))
                  }
                  
                  if (not(is.na(rent_var))) {
                    renter_controls_local <-
                      gsub(
                        "\\[\\_rent\\_var\\_\\]",
                        rent_var,
                        collapse_plus(renter_control_terms_aux)
                      )
                    xvars <- paste0(xvars, renter_controls_local)
                  }
                } else if (not(is.na(moderator)) & is.na(regional)) {
                  xvars <-
                    sapply(predictor_terms_aux, collapse_by, moderator) %>%
                    collapse_plus() %>%
                    gsub("\\[\\_pred\\_\\]",
                         predictor,
                         .)
                  
                  if (not(controls == "none")) {
                    xvars <- paste0(xvars,
                                    collapse_plus(common_control_terms_aux[[controls]]))
                  }
                  
                  if (not(is.na(rent_var))) {
                    renter_controls_local <-
                      sapply(renter_control_terms_aux,
                             collapse_by,
                             moderator) %>%
                      collapse_plus() %>%
                      gsub("\\[\\_rent\\_var\\_\\]",
                           rent_var,
                           .)
                    xvars <- paste0(xvars, renter_controls_local)
                  }
                } else if (is.na(moderator) & not(is.na(regional))) {
                  xvars <-
                    sapply(predictor_terms_aux, collapse_by, regional) %>%
                    collapse_plus() %>%
                    gsub("\\[\\_pred\\_\\]",
                         predictor,
                         .)
                  
                  if (not(controls == "none")) {
                    xvars <- paste0(xvars,
                                    collapse_plus(common_control_terms_aux[[controls]]))
                  }
                  
                  if (not(is.na(rent_var))) {
                    renter_controls_local <-
                      sapply(renter_control_terms_aux,
                             collapse_by,
                             regional) %>%
                      collapse_plus() %>%
                      gsub("\\[\\_rent\\_var\\_\\]",
                           rent_var,
                           .) %>%
                      gsub("\\[\\_pred\\_\\]",
                           predictor,
                           .)
                    xvars <- paste0(xvars, renter_controls_local)
                  }
                } else if (not(is.na(moderator)) &
                           not(is.na(regional))) {
                  xvars <-
                    sapply(predictor_terms_aux,
                           collapse_by,
                           collapse_by(moderator, regional)) %>%
                    collapse_plus() %>%
                    gsub("\\[\\_pred\\_\\]",
                         predictor,
                         .)
                  
                  if (not(controls == "none")) {
                    xvars <- paste0(xvars,
                                    collapse_plus(common_control_terms_aux[[controls]]))
                  }
                  
                  if (not(is.na(rent_var))) {
                    renter_controls_local <-
                      sapply(
                        renter_control_terms_aux ,
                        collapse_by,
                        collapse_by(moderator, regional)
                      ) %>%
                      collapse_plus() %>%
                      gsub("\\[\\_rent\\_var\\_\\]",
                           rent_var,
                           .) %>%
                      gsub("\\[\\_pred\\_\\]",
                           predictor,
                           .)
                    
                    xvars <- paste0(xvars, renter_controls_local)
                  }
                }
                
                ## Data
                if (outcome == "afd") {
                  data <- "soep_afd$imputations[[m]]"
                  
                } else if (outcome == "vote_afd") {
                  data <- "soep_vot$imputations[[m]]"
                } else {
                  data <- "soep_afd$imputations[[m]]"
                }
                
                if (not(is.na(owner))) {
                  data <- paste(data,
                                paste0("filter(", owner, ")"),
                                sep = " %>% \n    ")
                }

                ## Combine
                model_formula_local <- model_formula %>%
                  gsub("\\[\\_yvar\\_\\]", outcome, .) %>%
                  gsub("\\[\\_xvars\\_\\]",
                       xvars,
                       .) %>%
                  gsub("\\[\\_data\\_\\]",
                       data,
                       .) %>%
                  gsub("\\[\\_subset\\_\\]",
                       ifelse(is.na(ltr), "NULL", ltr),
                       .)
                
                ## Return
                model_formula_list_aux[[counter]] <-
                  list(
                    outcome = outcome,
                    predictor = predictor,
                    moderator = moderator,
                    regional = gsub("\\[\\_pred\\_\\]",
                                    predictor,
                                    regional),
                    owner = owner,
                    subset = ltr,
                    rent_var = rent_var,
                    controls = controls,
                    formula = model_formula_local
                  )
              }
            }
          }
        }
      }
    }
  }
}

## ---- Estimation (main analyses) ----
dir.create("est/stayer_analyses")

focal_analyses <-
  c(
    209,
    224,
    210,
    225,
    197,
    218,
    191,
    215,
    389,
    404,
    203,
    221,
    185,
    212,
    254,
    255,
    269,
    434,
    299,
    344,
    749,
    764,
    794,
    809,
    386
  )

## Capture start time
analysis <- 1
ptm <- proc.time()[3]
#for (analysis in analysis:length(model_formula_list_main)) {
for (analysis in focal_analyses) {
  ## Extract information for prediction
  mfx_vars <- 
    na.omit(unlist(model_formula_list_main[[analysis]][c("predictor", "rent_var")]))
  mfx_vars <- paste(rep(mfx_vars, each = 2L), c("cwu", "umn"), sep = "_")
  mod_var <- na.omit(model_formula_list_main[[analysis]]$moderator)
  reg_var <- na.omit(model_formula_list_main[[analysis]]$regional)
  
  ## Estimate model in parallel
  current_formula <- model_formula_list_main[[analysis]]$formula
  
  ## Set up cluster for parallelized computation across imputations
  cl <- makeCluster(length(soep_imp$imputations),
                    outfile = "est/stayer_analyses_main.txt",
                    type = "FORK")
  
  ## Model estimation
  est <- parLapply(cl,
                   seq_along(soep_imp$imputations),
                   function (m) {
                     eval(parse(text = current_formula))
                   })
  
  ## Exit parallel computation
  stopCluster(cl)
  
  ## Model data
  dat <- est[[1]]@frame %>%
    droplevels()
  
  ## Descriptives
  get_descriptives(
    dat,
    mfx_vars = mfx_vars,
    mod_var = mod_var,
    reg_var = reg_var,
    num_breaks = 31L
  ) %>%
    write.csv(paste0(
      "est/stayer_analyses/",
      "descriptives",
      analysis,
      "_main.csv"
    ))
  
  ## General and variables-specific prediction values
  xmeans <- bind_cols(
    dat %>%
      summarize_if(is.numeric, median, na.rm = TRUE),
    dat %>%
      mutate_if(is.character, as.factor) %>%
      summarize_if(is.factor, get_mode)
  )
  if ("log_hinc_eq" %in% names(dat))
    log_hinc_eq <- seq(
      quantile(dat$log_hinc_eq, .0005, na.rm = TRUE),
      quantile(dat$log_hinc_eq, .9995, na.rm = TRUE),
      length.out = 21L
    )
  if ("log_hinc_eq_cat" %in% names(dat))
    log_hinc_eq_cat <- levels(dat$log_hinc_eq_cat)
  if ("gtyp3" %in% names(dat))
    gtyp3 <- levels(as.factor(dat$gtyp3))
  if ("rent_cat" %in% names(dat))
    rent_cat <- levels(dat$rent_cat)
  if ("rent_chg_cat" %in% names(dat))
    rent_chg_cat <- levels(dat$rent_chg_cat)
  if ("cmr_arm_cat" %in% names(dat))
    cmr_arm_cat <- levels(dat$cmr_arm_cat)
  if ("cmr_arm_chg_cat" %in% names(dat))
    cmr_arm_chg_cat <- levels(dat$cmr_arm_chg_cat)
  
  ## Prediction data
  if (length(mod_var) == 0 & length(reg_var) == 0) {
    pred_data <- xmeans
    at_values <- NULL
  } else if (length(mod_var) > 0 & length(reg_var) == 0) {
    pred_data <- crossing(!!as.name(mod_var),
                          xmeans,
                          .name_repair = ~ make.names(., unique = TRUE))
    at_values <- list(get(eval(mod_var)))
    names(at_values) <- mod_var
  } else if (length(mod_var) == 0 & length(reg_var) > 0) {
    pred_data <- crossing(
      !!as.name(reg_var),
      xmeans,
      .name_repair = ~ make.names(., unique = TRUE)
    )
    at_values <- list(get(eval(reg_var)))
    names(at_values) <- reg_var
  } else if (length(mod_var) > 0 & length(reg_var) > 0) {
    pred_data <- crossing(!!as.name(mod_var),
                          !!as.name(reg_var),
                          xmeans,
                          .name_repair = ~ make.names(., unique = TRUE)
    )
    at_values <- list(get(eval(mod_var)),
                      get(eval(reg_var)))
    names(at_values) <- c(mod_var, reg_var)
  }
  
  ## Set up cluster for parallelized computation across imputations
  cl <- makeCluster(length(soep_imp$imputations),
                    outfile = "est/stayer_analyses.txt",
                    type = "FORK")
  
  ## Marginal effects
  mfx <- parLapply(cl,
                   seq_along(est),
                   function (m) {
                     margins(
                       est[[m]],
                       data = pred_data,
                       variables = mfx_vars,
                       at = at_values,
                       vce = "delta",
                       allow.new.levels = TRUE
                     ) %>%
                       summary()
                   })
  
  ## Exit parallel computation
  stopCluster(cl)
  
  ## Export output
  # Model matrix (stacked)
  mod_summary <- lapply(est, summary)
  
  # Estimates
  data.frame(
    model_id = analysis,
    outcome = model_formula_list_main[[analysis]]$outcome,
    predictor = model_formula_list_main[[analysis]]$predictor,
    moderator = model_formula_list_main[[analysis]]$moderator,
    regional =  model_formula_list_main[[analysis]]$regional,
    owner = model_formula_list_main[[analysis]]$owner,
    spec = model_formula_list_main[[analysis]]$subset,
    rent_var = model_formula_list_main[[analysis]]$rent_var,
    controls = model_formula_list_main[[analysis]]$controls,
    N_obs = nrow(est[[1]]@pp$X),
    N_id = nlevels(est[[1]]@flist$id),
    N_plz = nlevels(est[[1]]@flist$plz),
    N_kkz = nlevels(est[[1]]@flist$kkz),
    convergence_imp1 = est[[1]]@optinfo$conv$opt,
    convergence_imp2 = est[[2]]@optinfo$conv$opt,
    convergence_imp3 = est[[3]]@optinfo$conv$opt,
    convergence_imp4 = est[[4]]@optinfo$conv$opt,
    convergence_imp5 = est[[5]]@optinfo$conv$opt,
    sigma_res_imp1 = mod_summary[[1]]$sigma,
    sigma_id_imp1  = attr(mod_summary[[1]]$varcor$id, "stddev"),
    sigma_plz_imp1 = attr(mod_summary[[1]]$varcor$plz, "stddev"),
    sigma_kkz_imp1 = attr(mod_summary[[1]]$varcor$kkz, "stddev"),
    sigma_res_imp2 = mod_summary[[2]]$sigma,
    sigma_id_imp2  = attr(mod_summary[[2]]$varcor$id, "stddev"),
    sigma_plz_imp2 = attr(mod_summary[[2]]$varcor$plz, "stddev"),
    sigma_kkz_imp2 = attr(mod_summary[[2]]$varcor$kkz, "stddev"),
    sigma_res_imp3 = mod_summary[[3]]$sigma,
    sigma_id_imp3  = attr(mod_summary[[3]]$varcor$id, "stddev"),
    sigma_plz_imp3 = attr(mod_summary[[3]]$varcor$plz, "stddev"),
    sigma_kkz_imp3 = attr(mod_summary[[3]]$varcor$kkz, "stddev"),
    sigma_res_imp4 = mod_summary[[4]]$sigma,
    sigma_id_imp4  = attr(mod_summary[[4]]$varcor$id, "stddev"),
    sigma_plz_imp4 = attr(mod_summary[[4]]$varcor$plz, "stddev"),
    sigma_kkz_imp4 = attr(mod_summary[[4]]$varcor$kkz, "stddev"),
    sigma_res_imp5 = mod_summary[[5]]$sigma,
    sigma_id_imp5  = attr(mod_summary[[5]]$varcor$id, "stddev"),
    sigma_plz_imp5 = attr(mod_summary[[5]]$varcor$plz, "stddev"),
    sigma_kkz_imp5 = attr(mod_summary[[5]]$varcor$kkz, "stddev")
  ) %>%
    write.csv(paste0(
      "est/stayer_analyses/",
      "model_info",
      analysis,
      "_main.csv"
    ))
  
  model_coefficients <- cbind(
    data.frame(
      model_id = analysis,
      x_names = names(fixef(est[[1]])),
      b_imp1 = fixef(est[[1]]),
      b_imp2 = fixef(est[[2]]),
      b_imp3 = fixef(est[[3]]),
      b_imp4 = fixef(est[[4]]),
      b_imp5 = fixef(est[[5]])
    ),
    as.data.frame(as.matrix(vcov(est[[1]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp1")),
    as.data.frame(as.matrix(vcov(est[[2]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp2")),
    as.data.frame(as.matrix(vcov(est[[3]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp3")),
    as.data.frame(as.matrix(vcov(est[[4]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp4")),
    as.data.frame(as.matrix(vcov(est[[5]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp5"))
  ) %>%
    write.csv(paste0(
      "est/stayer_analyses/",
      "model_coefficients",
      analysis,
      "_main.csv"
    ))
  
  pool_mfx_summary(mfx) %>%
    as_tibble() %>%
    mutate(model_id = analysis) %>%
    relocate(model_id, .before = factor) %>%
    write.csv(paste0(
      "est/stayer_analyses/",
      "marginal_effects",
      analysis,
      "_main.csv"
    ))
  
  ## Clear memory
  rm(est, mod_summary, mfx, dat)
  gc(reset = TRUE)
  
  ## Progress
  print(paste0(
    "Analysis: ",
    analysis,
    ". Time elapsed: ",
    round((proc.time()[3] - ptm) / 60, 2),
    " minutes."
  ))
}


## ---- Collect output ----
model_files <- list.files("est/stayer_analyses")
model_info_files_main <-
  model_files[startsWith(model_files, "model_info") &
                endsWith(model_files, "_main.csv")]
model_coefficients_files_main <-
  model_files[startsWith(model_files, "model_coefficients") &
                endsWith(model_files, "_main.csv")]
marginal_effects_files_main <-
  model_files[startsWith(model_files, "marginal_effects") &
                endsWith(model_files, "_main.csv")]
descriptives_files_main <-
  model_files[startsWith(model_files, "descriptives") &
                endsWith(model_files, "_main.csv")]

## ---- Main analyses ----
## Model info
model_info <- data.frame()
for (file in model_info_files_main) {
  model_info <- model_info %>%
    bind_rows(read.csv(paste0("est/stayer_analyses/",
                              file)))
}

model_info <- model_info %>%
  dplyr::select(-X) %>%
  arrange(model_id)

model_info %>%
  write.csv("est/stayer_analyses_model_info_main.csv")


## Model coefficients
model_coefficients <- data.frame()
for (file in model_coefficients_files_main) {
  model_coefficients <- model_coefficients %>%
    bind_rows(read.csv(paste0("est/stayer_analyses/",
                              file)))
}

model_coefficients <- model_coefficients %>%
  dplyr::select(-X) %>%
  arrange(model_id)

model_coefficients %>%
  write.csv("est/stayer_analyses_model_coefficients_main.csv")

## Marginal effects
marginal_effects <- data.frame()
for (file in marginal_effects_files_main) {
  marginal_effects <- marginal_effects %>%
    bind_rows(read.csv(paste0("est/stayer_analyses/",
                              file)))
}

marginal_effects <- marginal_effects %>%
  dplyr::select(-X) %>%
  arrange(model_id)

marginal_effects %>%
  write.csv("est/stayer_analyses_marginal_effects_main.csv")

## Descriptives
descriptives <- data.frame()
for (file in descriptives_files_main) {
  descriptives <- descriptives %>%
    bind_rows(read.csv(paste0("est/stayer_analyses/",
                              file)) %>%
                mutate(model_id = readr::parse_number(file)))
}

descriptives <- descriptives %>%
  dplyr::select(-X) %>%
  arrange(model_id)

descriptives %>%
  write.csv("est/stayer_analyses_descriptives_main.csv")


## ---- Estimation (auxiliary analyses) ----
dir.create("est/stayer_analyses_aux")

## Capture start time
focal_analyses <- c(5, 8, 19, 20)
ptm <- proc.time()[3]
for (analysis in focal_analyses) {
  ## Extract information for prediction
  mfx_vars <- 
    na.omit(unlist(model_formula_list_aux[[analysis]][c("predictor", "rent_var")]))
  mfx_vars <- paste(rep(mfx_vars, each = 2L), c("cwu", "umn"), sep = "_")
  mod_var <- na.omit(model_formula_list_aux[[analysis]]$moderator)
  reg_var <- na.omit(model_formula_list_aux[[analysis]]$regional)
  
  ## Estimate model in parallel
  current_formula <- model_formula_list_aux[[analysis]]$formula
  
  ## Set up cluster for parallelized computation across imputations
  cl <- makeCluster(length(soep_imp$imputations),
                    outfile = "est/stayer_analyses_aux.txt",
                    type = "FORK")
  
  ## Model estimation
  est <- parLapply(cl,
                   seq_along(soep_imp$imputations),
                   function (m) {
                     eval(parse(text = current_formula))
                   })
  
  ## Exit parallel computation
  stopCluster(cl)
  
  ## Model data
  dat <- est[[1]]@frame %>%
    droplevels()
  
  ## Descriptives
  get_descriptives(
    dat,
    mfx_vars = mfx_vars,
    mod_var = mod_var,
    reg_var = reg_var,
    num_breaks = 31L
  ) %>%
    write.csv(paste0(
      "est/stayer_analyses_aux/",
      "descriptives",
      analysis,
      "_aux.csv"
    ))
  
  ## General and variables-specific prediction values
  xmeans <- bind_cols(
    dat %>%
      summarize_if(is.numeric, median, na.rm = TRUE),
    dat %>%
      mutate_if(is.character, as.factor) %>%
      summarize_if(is.factor, get_mode)
  )
  if ("log_hinc_eq" %in% names(dat))
    log_hinc_eq <- seq(
      quantile(dat$log_hinc_eq, .0005, na.rm = TRUE),
      quantile(dat$log_hinc_eq, .9995, na.rm = TRUE),
      length.out = 21L
    )
  if ("log_hinc_eq_cat" %in% names(dat))
    log_hinc_eq_cat <- levels(dat$log_hinc_eq_cat)
  if ("gtyp3" %in% names(dat))
    gtyp3 <- levels(as.factor(dat$gtyp3))
  if ("rent_cat" %in% names(dat))
    rent_cat <- levels(dat$rent_cat)
  if ("rent_chg_cat" %in% names(dat))
    rent_chg_cat <- levels(dat$rent_chg_cat)
  if ("cmr_arm_cat" %in% names(dat))
    cmr_arm_cat <- levels(dat$cmr_arm_cat)
  if ("cmr_arm_chg_cat" %in% names(dat))
    cmr_arm_chg_cat <- levels(dat$cmr_arm_chg_cat)
  
  ## Prediction data
  if (length(mod_var) == 0 & length(reg_var) == 0) {
    pred_data <- xmeans
    at_values <- NULL
  } else if (length(mod_var) > 0 & length(reg_var) == 0) {
    pred_data <- crossing(!!as.name(mod_var),
                          xmeans,
                          .name_repair = ~ make.names(., unique = TRUE))
    at_values <- list(get(eval(mod_var)))
    names(at_values) <- mod_var
  } else if (length(mod_var) == 0 & length(reg_var) > 0) {
    pred_data <- crossing(
      !!as.name(reg_var),
      xmeans,
      .name_repair = ~ make.names(., unique = TRUE)
    )
    at_values <- list(get(eval(reg_var)))
    names(at_values) <- reg_var
  } else if (length(mod_var) > 0 & length(reg_var) > 0) {
    pred_data <- crossing(!!as.name(mod_var),
                          !!as.name(reg_var),
                          xmeans,
                          .name_repair = ~ make.names(., unique = TRUE)
    )
    at_values <- list(get(eval(mod_var)),
                      get(eval(reg_var)))
    names(at_values) <- c(mod_var, reg_var)
  }
  
  ## Set up cluster for parallelized computation across imputations
  cl <- makeCluster(length(soep_imp$imputations),
                    outfile = "est/stayer_analyses_aux.txt",
                    type = "FORK")
  
  ## Marginal effects
  mfx <- parLapply(cl,
                   seq_along(est),
                   function (m) {
                     margins(
                       est[[m]],
                       data = pred_data,
                       variables = mfx_vars,
                       at = at_values,
                       vce = "delta",
                       allow.new.levels = TRUE
                     ) %>%
                       summary()
                   })
  
  ## Exit parallel computation
  stopCluster(cl)
  
  ## Export output
  # Model matrix (stacked)
  mod_summary <- lapply(est, summary)
  
  # Estimates
  data.frame(
    model_id = analysis,
    outcome = model_formula_list_aux[[analysis]]$outcome,
    predictor = model_formula_list_aux[[analysis]]$predictor,
    moderator = model_formula_list_aux[[analysis]]$moderator,
    regional =  model_formula_list_aux[[analysis]]$regional,
    owner = model_formula_list_aux[[analysis]]$owner,
    spec = model_formula_list_aux[[analysis]]$subset,
    rent_var = model_formula_list_aux[[analysis]]$rent_var,
    controls = model_formula_list_aux[[analysis]]$controls,
    N_obs = nrow(est[[1]]@pp$X),
    N_id = nlevels(est[[1]]@flist$id),
    N_plz = nlevels(est[[1]]@flist$plz),
    N_kkz = nlevels(est[[1]]@flist$kkz),
    convergence_imp1 = est[[1]]@optinfo$conv$opt,
    convergence_imp2 = est[[2]]@optinfo$conv$opt,
    convergence_imp3 = est[[3]]@optinfo$conv$opt,
    convergence_imp4 = est[[4]]@optinfo$conv$opt,
    convergence_imp5 = est[[5]]@optinfo$conv$opt,
    sigma_res_imp1 = mod_summary[[1]]$sigma,
    sigma_id_imp1  = attr(mod_summary[[1]]$varcor$id, "stddev"),
    sigma_plz_imp1 = attr(mod_summary[[1]]$varcor$plz, "stddev"),
    sigma_kkz_imp1 = attr(mod_summary[[1]]$varcor$kkz, "stddev"),
    sigma_res_imp2 = mod_summary[[2]]$sigma,
    sigma_id_imp2  = attr(mod_summary[[2]]$varcor$id, "stddev"),
    sigma_plz_imp2 = attr(mod_summary[[2]]$varcor$plz, "stddev"),
    sigma_kkz_imp2 = attr(mod_summary[[2]]$varcor$kkz, "stddev"),
    sigma_res_imp3 = mod_summary[[3]]$sigma,
    sigma_id_imp3  = attr(mod_summary[[3]]$varcor$id, "stddev"),
    sigma_plz_imp3 = attr(mod_summary[[3]]$varcor$plz, "stddev"),
    sigma_kkz_imp3 = attr(mod_summary[[3]]$varcor$kkz, "stddev"),
    sigma_res_imp4 = mod_summary[[4]]$sigma,
    sigma_id_imp4  = attr(mod_summary[[4]]$varcor$id, "stddev"),
    sigma_plz_imp4 = attr(mod_summary[[4]]$varcor$plz, "stddev"),
    sigma_kkz_imp4 = attr(mod_summary[[4]]$varcor$kkz, "stddev"),
    sigma_res_imp5 = mod_summary[[5]]$sigma,
    sigma_id_imp5  = attr(mod_summary[[5]]$varcor$id, "stddev"),
    sigma_plz_imp5 = attr(mod_summary[[5]]$varcor$plz, "stddev"),
    sigma_kkz_imp5 = attr(mod_summary[[5]]$varcor$kkz, "stddev")
  ) %>%
    write.csv(paste0(
      "est/stayer_analyses_aux/",
      "model_info",
      analysis,
      "_aux.csv"
    ))
  
  model_coefficients <- cbind(
    data.frame(
      model_id = analysis,
      x_names = names(fixef(est[[1]])),
      b_imp1 = fixef(est[[1]]),
      b_imp2 = fixef(est[[2]]),
      b_imp3 = fixef(est[[3]]),
      b_imp4 = fixef(est[[4]]),
      b_imp5 = fixef(est[[5]])
    ),
    as.data.frame(as.matrix(vcov(est[[1]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp1")),
    as.data.frame(as.matrix(vcov(est[[2]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp2")),
    as.data.frame(as.matrix(vcov(est[[3]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp3")),
    as.data.frame(as.matrix(vcov(est[[4]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp4")),
    as.data.frame(as.matrix(vcov(est[[5]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp5"))
  ) %>%
    write.csv(paste0(
      "est/stayer_analyses_aux/",
      "model_coefficients",
      analysis,
      "_aux.csv"
    ))
  
  pool_mfx_summary(mfx) %>%
    as_tibble() %>%
    mutate(model_id = analysis) %>%
    relocate(model_id, .before = factor) %>%
    write.csv(paste0(
      "est/stayer_analyses_aux/",
      "marginal_effects",
      analysis,
      "_aux.csv"
    ))
  
  ## Clear memory
  rm(est, mod_summary, mfx, dat)
  gc(reset = TRUE)
  
  ## Progress
  print(paste0(
    "Analysis: ",
    analysis,
    ". Time elapsed: ",
    round((proc.time()[3] - ptm) / 60, 2),
    " minutes."
  ))
}


## ---- Collect output ----
model_files <- list.files("est/stayer_analyses_aux")
model_info_files_aux <-
  model_files[startsWith(model_files, "model_info") &
                endsWith(model_files, "_aux.csv")]
model_coefficients_files_aux <-
  model_files[startsWith(model_files, "model_coefficients") &
                endsWith(model_files, "_aux.csv")]
marginal_effects_files_aux <-
  model_files[startsWith(model_files, "marginal_effects") &
                endsWith(model_files, "_aux.csv")]
descriptives_files_aux <-
  model_files[startsWith(model_files, "descriptives") &
                endsWith(model_files, "_aux.csv")]

## ---- Main analyses ----
## Model info
model_info <- data.frame()
for (file in model_info_files_aux) {
  model_info <- model_info %>%
    bind_rows(read.csv(paste0("est/stayer_analyses_aux/",
                              file)))
}

model_info <- model_info %>%
  dplyr::select(-X) %>%
  arrange(model_id)

model_info %>%
  write.csv("est/stayer_analyses_model_info_aux.csv")


## Model coefficients
model_coefficients <- data.frame()
for (file in model_coefficients_files_aux) {
  model_coefficients <- model_coefficients %>%
    bind_rows(read.csv(paste0("est/stayer_analyses_aux/",
                              file)))
}

model_coefficients <- model_coefficients %>%
  dplyr::select(-X) %>%
  arrange(model_id)

model_coefficients %>%
  write.csv("est/stayer_analyses_model_coefficients_aux.csv")

## Marginal effects
marginal_effects <- data.frame()
for (file in marginal_effects_files_aux) {
  marginal_effects <- marginal_effects %>%
    bind_rows(read.csv(paste0("est/stayer_analyses_aux/",
                              file)))
}

marginal_effects <- marginal_effects %>%
  dplyr::select(-X) %>%
  arrange(model_id)

marginal_effects %>%
  write.csv("est/stayer_analyses_marginal_effects_aux.csv")

## Descriptives
descriptives <- data.frame()
for (file in descriptives_files_aux) {
  descriptives <- descriptives %>%
    bind_rows(read.csv(paste0("est/stayer_analyses_aux/",
                              file)) %>%
                mutate(model_id = readr::parse_number(file)))
}

descriptives <- descriptives %>%
  dplyr::select(-X) %>%
  arrange(model_id)

descriptives %>%
  write.csv("est/stayer_analyses_descriptives_aux.csv")